/*
Do-file name: 5_DescriptivesAKM.do
Author: Thiago
Date: Nov 7th 2018
This version: April 19th 2021
Comments: This do-file performs all descriptive statistics for Judges, courts, decisions, etc... using CNJ
productivity data. It's output includes several graphs and tables, as well as a clean dataset used to estimate
the AKM model.
*/

clear all
set more off

use "$temp/CnjDataset_clean.dta", clear

**** Add RAIS information ****
preserve
	use "$data_input/identified/data_judges_characteristics.dta", clear
	rename id_judge Judge_idP
	rename gender gender_RAIS
	tempfile judges
	save "`judges'"
restore

merge m:1 Judge_idP using "`judges'"
drop if _merge == 2
drop _merge

**** Add amenities information **** 
merge m:1 Munic_code3 using "$data_input/mun_amenities.dta"
drop if _merge == 2
drop _merge
drop city state

gen log_population = ln(ibge_res_pop)
lab var log_population "Log population (2010)"
gen log_gdp = ln(gdp_capita)
lab var log_gdp "Log GDP per capita (2016)"


**** Add Concurso information ****
preserve
	use "$temp/JudgesDataset_clean.dta", clear
	gduplicates drop Judge_idP, force
	tempfile temp
	save "`temp'"
restore

merge m:1 Judge_idP using "`temp'"
drop if _merge == 2											

gen match_sample = (_merge == 3)
drop _merge

****************************************************************************************************************
********************************  SECTION 1: DESCRIPTIVE STATS FOR FULL SAMPLE  ********************************
****************************************************************************************************************

*Graph of sentences per month and observations per month
sort YM
gegen tot_YM = total(Sent_Trial_Merits_J), by(YM)
by YM: gen number = _n
replace tot_YM = . if number!=1
gstats summ  tot_YM													//average sentences per month in the whole country

by YM: gen obs_month = _N
replace obs_month = . if number!=1

sort YM State

gegen tot_YM_state = total(Sent_Trial_Merits_J), by(YM State)
by YM State: gen number_state = _n
replace tot_YM_state = . if number_state!=1

by YM State: gen obs_month_state = _N
replace obs_month_state = . if number_state!=1
sort YM
	
gen unit = 1

*Generate variable indicating in how many courts judges work, and how many judges work in a given court.
bys Judge_idP YM: gen total_courts_overall = _N
bys Court_idP YM: gen total_judges_overall = _N

*Hearings: we only have EITHER presided or held for each observation - if one is there the other is missing. Take total
gegen Hearing_J = rsum(Hearings_Presided_J Hearings_Held_J), mis
lab var Hearing_J "Total Hearings (presided or held)"
lab var Sent_Trial_Merits_J "Cases Disposed (on merit)"

****************************************************************************************************************
********************************  SECTION 2: DEFINING ANALYSIS SAMPLE ********* ********************************
****************************************************************************************************************
		
*1 . Trimming Outcome Variables - dropping the top 1% of observations for each variable
foreach var of varlist 	Hearings_Presided_J Hearings_Held_J Sent_Trial_Merits_J Decisions_J Sent_Homologation_Agreements_J Sent_Extinction_Punishment_J Orders_J Hearing_J {
	qui gstats summ `var',d
	replace `var' = . if `var'> r(p99)
}	

*2. Drop observations for which our main outcome is missing
keep if Sent_Trial_Merits_J ! = .

*After dropping missing Sent_Trial_Merits_J, recompute stats

do "$do/5a_Descriptive_AKM_CreateVars.do"

*3. I'll create different indicators for samples, summarize variables to compare, and then go back to dropping obsrvations
sort Judge_Court_idP
gegen tot_pair = max(period_by_judge_court), by(Judge_Court_idP)
gen sample1 = tot_pair >= 3										

*Generate sample dropping spells shorter than 2 and then shorter than 3 months
gen sample2 = duration_spell_tot >= 3

*Generate connected sets
gen Court_idP2 = -Court_idP	
group_twoway Court_idP2 Judge_idP, gen(connected_sets)
bys connected_sets: gen cs_unique = connected_sets if _n==1
bys State connected_sets : gen count_group = _N
gegen max_group = max(count_group), by(State)
gen dummy_largest = (max_group == count_group)


global descr_stats_short "male_f court_by_judge period_by_judge courts_month Unique_munic_judge  period_by_judge_court judge_by_court judges_month"
global descr_court_short "type_court_short_3_f type_court_short_1_f  type_court_short_5_f type_court_short_2_f  type_court_short_4_f type_court_short_6_f Sent_Trial_Merits_J Hearing_J"
global descr_n "Unique_Judge judge_multiple Unique_Court court_multiple Unique_Judge_Court   unique_spell cs_unique unit"


global descr_stats "male_f court_by_judge courts_month district_month period_by_judge maxrun_judge max_districts Unique_munic_judge promotion_unique judge_by_court type_court_short_*_f judges_month Allocated_C_court Process_Ongoing_C_court Process_Await_Compliance_C_court Process_Await_Progress_C_court period_by_judge_court maxrun duration_spell Sent_Trial_Merits_J Decisions_J Sent_Homologation_Agreements_J Sent_Extinction_Punishment_J Orders_J Hearings_Presided_J Hearings_Held_J Hearing_J"
global descr_stats_slide "male_f period_by_judge court_by_judge courts_month  maxrun_judge "
global descr_court_slide "type_court_short_3_f type_court_short_1_f  type_court_short_5_f type_court_short_2_f  type_court_short_4_f type_court_short_6_f court_level_1_f court_level_2_f court_level_3_f court_level_4_f court_level_5_f"
global slides "Unique_Judge judge_multiple Unique_Court court_multiple unit"
global slides2 "court_by_judge courts_month  judge_by_court judges_month period_by_judge_court"


*These next section calculates descriptive statistics for 4 samples: the entire dataset + 3 restricted samples above
eststo CS_sample0:    estpost tabstat dummy_largest, statistics(mean) by(State) listwise columns(statistics)
eststo sample0:       estpost su $descr_stats_short $descr_court_short
eststo nb_sample0:    estpost su $descr_n
eststo sample_slide0: estpost su $descr_stats_slide
eststo court_slide0:  estpost su $descr_court_slide

drop Court_idP2 connected_sets count_group max_group dummy_largest cs_unique

gen Court_idP2        = -Court_idP	
gen date              = dofm(YM)
gen year              = year(date)
gen month             = month(date)
gen sentence_IHS      = asinh(Sent_Trial_Merits_J) 
gen hearing_IHS       = asinh(Hearing_J) 
gen await_100days_IHS = asin(Process_Await_Sent_J)
gen homologacao_IHS   = asinh(Sent_Homologation_Agreements_J)
gen extinction_IHS    = asinh(Sent_Extinction_Punishment_J)

gegen period_by_judge_court_all = mean(period_by_judge_court), by(Judge_Court_idP)

save "$temp/judges_intermediary.dta", replace

/*Generate Database for Linear Fixed Effects Estimates in R: We estimate AKM model in R, so first export
database to .csv then open in R. The commented command below allows for estimation in R directly from Stata.
This is the full sample - our baseline preferred sample is exported later
*/
preserve
	export delimited Judge Court_name State Judge_idP Court_idP2  YM year month  ///
	Sent_Trial_Merits_J Hearing_J sentence_IHS  hearing_IHS total_courts_overall total_judges_overall months_judge_court ///
	Judge_Court_idP duration_spell_tot period_by_judge_court_all await_100days_IHS homologacao_IHS extinction_IHS ///
	using "$temp/Intermed_FE_FULL_SAMPLE_R.csv", replace
restore

**************************************************************************************************************
*************************             DEFINING ANALYSIS SAMPLE      ******************************************
**************************************************************************************************************

*2. Dropping Judge-Court pairs that are only observed 1 or 2 periods during the entire time
keep if sample2 == 1

*Dropping two courts that jointly have less than 50 obs but AKM will not estimate their FE (unclear why)
drop if Court_idP == 4620 | Court_idP == 3821

qui  group_twoway Court_idP2 Judge_idP, gen(connected_sets)
bys State connected_sets : gen count_group = _N
gegen max_group = max(count_group), by(State)
gen dummy_largest = (max_group == count_group)

keep if dummy_largest == 1
bys connected_sets: gen cs_unique = connected_sets if _n==1

******************************************************************************
****** FIGURE A1: Average number of cases disposed, by type of court *********
******************************************************************************

graph bar (mean) Sent_Trial_Merits_J , over(type_court, label(angle(45)) sort(1)) ylabel(,nogrid) ///
graphregion(color(white))  ytitle("Mean # Decisions") bar(1,color(dknavy))
graph export "$paper_figures/CourtType_sentences.pdf", replace

*** GRAPH Share of judges by state matched to admission exams ***
bys Judge_idP: gen unique_state = match_sample if _n==1

******************************************************************************
***************** FIGURE A2: Share of judges matched by State ****************
******************************************************************************
graph bar (mean) unique_state , over(State, label(angle(90)) sort(1)) ylabel(,nogrid) yline(.265) ///
bar(1,fcolor(dknavy)) graphregion(color(white))  ytitle("% Judges in State with grade data")
graph export "$paper_figures/ShareJudgesGrade.pdf", replace

* Correlation between outcomes
estpost correlate Sent_Trial_Merits_J Sent_Homologation_Agreements_J Hearing_J  Sent_Extinction_Punishment_J Decisions_J, matrix
esttab . using "$paper_tables/correlations_BR_trim.tex", ///
not unstack booktabs nonum collabels(none) ///
f nogaps noobs replace 

drop period_by_judge_court total_courts total_judges Unique_Judge court_by_judge judge_multiple Judge_YM ///
		courts_month Unique_Judge_Court Unique_Court  judge_by_court court_multiple Court_YM Court_YM_unique judges_month ///
		Unique_munic_judge Unique_munic court_by_munic   unique_spell duration_spell maxrun maxrun_judge ///
		first_date last_date period_by_judge spell_ident duration_spell_tot _spell _seq _end Allocated_C_court ///
		Process_Ongoing_C_court Process_Await_Compliance_C_court Process_Await_Progress_C_court district_month max_districts ///
		type_court_short_*_f gender_f male_f promotion_max_f court_level_*_f months_judge_court

do "$do/5a_Descriptive_AKM_CreateVars.do"

******************************************************************************
******  Table A2: Detailed descriptive statistics in estimation sample  ******
******************************************************************************

estpost su $descr_stats_short $descr_court_short, d
est store indiv
esttab indiv using "$paper_tables/descriptive_BR_SS.tex", replace ///
				refcat(male_f "\textbf{Panel A - Judges}" judge_by_court "\textbf{Panel B - Courts}" ///
				Sent_Trial_Merits_J "\textbf{Panel C - Output measures}", nolabel) ///
				cells("mean(fmt(%12.2fc)) sd p50(fmt(0))count(fmt(%12.0fc))") label booktabs nonum collabels("Mean" "SD"  "Median"  "N") f nogaps noobs		

******************************************************************************				
*********************	Table 1: Descriptive statistics    *******************
******************************************************************************

eststo sample_final : estpost su $descr_stats_short $descr_court_short
eststo nb_sample_final : estpost su $descr_n

preserve
	
	keep if match_sample == 1

	drop period_by_judge_court total_courts total_judges Unique_Judge court_by_judge judge_multiple Judge_YM ///
		courts_month Unique_Judge_Court Unique_Court  judge_by_court court_multiple Court_YM Court_YM_unique judges_month ///
		Unique_munic_judge Unique_munic court_by_munic   unique_spell duration_spell maxrun maxrun_judge ///
		first_date last_date period_by_judge spell_ident duration_spell_tot _spell _seq _end Allocated_C_court ///
		Process_Ongoing_C_court Process_Await_Compliance_C_court Process_Await_Progress_C_court district_month max_districts ///
		type_court_short_*_f gender_f male_f promotion_max_f court_level_*_f  months_judge_court
	
	drop cs_unique
	bys connected_sets: gen cs_unique = connected_sets if _n==1
	
	do "$do/5a_Descriptive_AKM_CreateVars.do"
	
	eststo sample_match : estpost su $descr_stats_short $descr_court_short if match_sample == 1
	eststo nb_sample_match : estpost su $descr_n if match_sample == 1

restore


eststo sample_ttest : estpost ttest $descr_stats_short $descr_court_short, by(match_sample)


esttab sample0 sample_final sample_match using "$paper_tables/descriptive_compare.tex", replace ///
		refcat(male_f "\textbf{Panel A: Judges}" judge_by_court "\textbf{Panel B: Courts}"  ///
		Sent_Trial_Merits_J "\textbf{Panel C: Output measures}", nolabel) ///
		cells("mean(fmt(%12.2fc))") label booktabs nonum collabels(none) f  noobs ///
		coeflabels(male_f "Share male judges" court_by_judge "Mean \# courts by judge " courts_month "Mean \# courts at judge-month level" ///
		Unique_munic_judge "Mean \# judicial districts at judge-month level"  period_by_judge_court "Mean \# months per judge-court pair" ///
		judge_by_court "Mean \# of judges by court" judges_month "Mean \# judges at court-month level" type_court_short_3_f "Share civil courts" ///
		type_court_short_1_f "Share general courts"  type_court_short_5_f "Share small-stakes courts" type_court_short_2_f "Share criminal courts" ///
		type_court_short_4_f "Share family court" type_court_short_6_f "Share other courts" period_by_judge "Mean \# months by judge")  ///
		mtitles("\shortstack{Full \\ Sample}" "\shortstack{Estimation \\ Sample}" "\shortstack{Exam matched \\ Sample}")

esttab nb_sample0 nb_sample_final nb_sample_match  using "$paper_tables/descriptive_compare.tex", append ///
		cells("count(fmt(%12.0fc)) ") label booktabs nonum noobs nomtitle  collabels(none) f  ///
		coeflabels(Unique_Judge "Number of judges" Unique_Court "Number of courts" Unique_Judge_Court "Number of judge-court pairs" ///
		judge_multiple "Number judges ever working in multiple courts" court_multiple "Number of courts with multiple judges" ///
		unique_spell "Number of judge-court spells" cs_unique "Number of connected sets" unit "Number of judge-court-month observations")
		 		
				
**********

save "$temp/judges_estimation.dta", replace						

